/* -------------------------------------------------------------------------
 * Copyright (c) 2021 Huawei Technologies Co.,Ltd.
 *
 * openGauss is licensed under Mulan PSL v2.
 * You can use this software according to the terms and conditions of the Mulan PSL v2.
 * You may obtain a copy of Mulan PSL v2 at:
 *
 *          http://license.coscl.org.cn/MulanPSL2
 *
 * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
 * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
 * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
 * See the Mulan PSL v2 for more details.
 * -------------------------------------------------------------------------
 *
 * func_hardcoded_values.cpp
 *
 * When creating a function definition (function, procedure, or anonymous block), you must encrypt the plaintext value
 * to be encrypted on the client so that the original value is not explicitly sent to the server.
 * We need to support both SQL and PL/PGSQL languages.
 * For the SQL language, the syntax body can be parsed as a whole.
 * For the PL/PGSQL language, because the syntax has control blocks, you need to customize a splitter to split
 * the query syntax into a query array, and then parse each query in the query array separately.
 *
 * IDENTIFICATION
 *  src\common\interfaces\libpq\client_logic_processor\func_hardcoded_values.cpp
 *
 * -------------------------------------------------------------------------
 */
#include "nodes/parsenodes_common.h"
#include "client_logic_common/statement_data.h"
#include "client_logic_common/client_logic_utils.h"
#include "func_hardcoded_values.h"
#include "stmt_processor.h"
#include "libpq-fe.h"

/*
 * @description the main function to do the processing of the hardcoded values from top to bottom
 * @param[in] options - list of DefElem nodes - they are derieved either from the CreateFunctionStmt object or the
 *  DoStmt object
 * @param[in] StatementData - the current client logic state machine for the query
 * @param[in] is_do_stmt - did the parser recognize this query as a DoStmt node
 * @return boolean, returns true or false for severe, unexpected errors
 */
bool FuncHardcodedValues::process(const List *options, StatementData *statement_data, bool is_do_stmt)
{
    char *body = NULL;
    size_t body_size = 0;
    size_t body_location = 0;
    size_t delimiter_location = 0;
    size_t delimiter_size = 0;
    const char *language = NULL;
    if (!process_options(options, is_do_stmt, &body, &delimiter_location, &delimiter_size, &body_location, &body_size,
        &language, statement_data)) {
        return false;
    }

    /* the function language is not supported by our feature so just quit */
    if (!language) {
        return true;
    }

    Body **bodies = NULL;
    size_t bodies_size = 0;

    /*
     * in case of PL/PGSQL we have to split the batch query to multiple queries because we only support SQL queries (we
     * only have a SQL bison parser) in case of SQL, we use the entire batch query in a bulk
     */
    if (pg_strncasecmp(language, "sql", strlen("sql")) == 0) {
        bodies = make_sql_body(body, body_size, body_location, &bodies_size);
    } else if (pg_strncasecmp(language, "plpgsql", strlen("plpgsql")) == 0) {
        bodies = split_body(body, body_size, body_location, &bodies_size);
    }
    if (!bodies) {
        return true;
    }

    /* process hardcoded values in each query */
    size_t body_location_offset = 0;
    for (size_t i = 0; i < bodies_size; ++i) {
        if (!parse_body(language, bodies[i], delimiter_location, delimiter_size, &body_location_offset,
            statement_data)) {
            return false;
        }
    }

    return true;
}

/**
 * @description extract the objects from the query generated by CREATE FUNCTION, CREATE PROCEDURE or DO
 * @param[in] options - list of DefElem nodes - they are derieved either from the CreateFunctionStmt object or the
 *                      DoStmt object
 * @param[in] is_do_stmt - did the parser recognize this query as a DoStmt node
 * @param[out] body - the query body
 * @param[out] delimiter_location - the location or position of where the query openining delimiter starts in the CREATE
 *                                  FUNCTION query
 * @param[out] delimiter_size - the query size from the opening delimiter to the enclosing delimiter
 * @param[out] body_location - the location or position of where the query body starts in the CREATE FUNCTION query
 * @param[out] body_size - the query body size
 * @param[out] lang - the language used in the current function. supported language include SQL or PL/PGSQL
 * @param[in] StatementData - the current client logic state machine for the query
 * @return boolean, returns true or false for severe, unexpected errors
 */
bool FuncHardcodedValues::process_options(const List *options, bool is_do_stmt, char **body, size_t *delimiter_location,
    size_t *delimiter_size, size_t *body_location, size_t *body_size, const char **language,
    const StatementData *statement_data)
{
    DefElem *as_item = NULL;
    DefElem *language_item = NULL;
    if (!extract_def_elem_options(options, is_do_stmt, &as_item, &language_item, delimiter_location, delimiter_size,
        statement_data)) {
        return false;
    }
    if (!process_def_elem_options(as_item, is_do_stmt, language_item, body, delimiter_location, delimiter_size,
        body_location, body_size, language, statement_data)) {
        return false;
    }
    return true;
}

/*
 * @description get the body of the function defintiion after its been processed by the lexer
 * @param[in] as_item - AS element
 * @return the body from the lexer
 */
const char *FuncHardcodedValues::process_def_body_parser(DefElem *as_item)
{
    if (!as_item) {
        Assert(false);
        return NULL;
    }
    const char *body_from_parser = NULL;
    if (IsA(as_item->arg, List)) { /* function creation */
        body_from_parser = strVal(linitial((List *)(as_item->arg)));
    } else if (IsA(as_item->arg, String)) { /* anonymous block */
        body_from_parser = strVal(as_item->arg);
    }
    if (!body_from_parser || strlen(body_from_parser) == 0) {
        return NULL;
    }
    return body_from_parser;
}

/*
 * @description get the body language type (SQL or PL/PGSQL, the rest are not supported)
 * @param[in] language_item - the LANGUAGE element
 * @param[in] is_do_stmt - whether the function was called through a DoStmt flow
 * @return the language of the body definition
 */
const char *FuncHardcodedValues::process_def_language(DefElem *language_item, bool is_do_stmt)
{
    if (language_item == NULL && is_do_stmt == false) {
        Assert(false);
        return NULL;
    }

    const char *language = NULL;
    if (language_item) {
        language = strVal(language_item->arg);
    } else {
        /* language_item is NULL and is_do_stmt is true (checked earlier) */
        language = (const char *)"plpgsql";
    }
    return language;
}

/*
 * @description an helper function to skip spaces - used for getting to the delimiter ($$, etc.)
 * @param[IN] query - the original query sent by the client
 * @param[IN/OUT] location - the current offset in the query
 * @param[IN/OUT] size - the size of the query after the location
 * @return the pointer to the query the location is pointing to, otherise NULL in errors
 */
const char *FuncHardcodedValues::process_def_skip_spaces(const char *query, size_t *location, size_t *size)
{
    if (!query || !location || !size) {
        Assert(false);
        return NULL;
    }
    const char *body_start = query + *location;
    while (body_start != NULL && isspace(*body_start)) {
        *location += 1;
        *size -= 1;
        body_start = query + *location;
    }
    return body_start;
}

/*
 * @description skip the DO command to get to the delimiter ($$, etc.)
 * @param[IN] query - the original query sent by the client
 * @param[IN/OUT] location - the current offset in the query
 * @param[IN/OUT] size - the size of the query after the location
 * @param[IN] is_do_stmt - whether the function was called from a DoStmt flow
 * @return a pointer to the query of where the location is pointing to now, otherwise NULL in errors
 */
const char *FuncHardcodedValues::process_def_skip_do_cmd(const char *query, size_t *location, size_t *size,
    bool is_do_stmt)
{
    if (!query || !location || !size) {
        Assert(false);
        return NULL;
    }

    const char *body_start = query + *location;
    if (is_do_stmt && pg_strncasecmp((const char *)body_start, "DO", strlen("DO")) == 0) {
        *location += strlen("DO");
        *size -= strlen("DO");
        body_start = process_def_skip_spaces(query, location, size);
        if (!body_start) {
            return NULL;
        }
    }
    return body_start;
}

/*
 * @description update the body to the body between the hyphen delimiters, and update the delimiter size to point until
 *  the closing delimiter
 * @param[IN/OUT] delimiter_size - the size of the query from the beginning of the opening delimiter to the end of the
 *  closing delimiter
 * @param[IN/OUT] body_start - a pointer to the query of where the body_location is pointing to know
 * @param[IN/OUT] body_location - the current offset in the query
 * @param[IN/OUT] body_size - the size of the query after the location
 * @return true or false for unexpected errors
 */
bool FuncHardcodedValues::process_def_process_delimiter_hyphen(size_t *delimiter_size, const char *body_start,
    size_t *body_location, size_t *body_size)
{
    if (!delimiter_size || !body_start || !body_location || !body_size) {
        Assert(false);
        return false;
    }

    const char *body_start_before = body_start;
    const char *body_end = body_start + *body_size - 1;
    const char *body_end_before = body_end;

    /* skip over the hyphen */
    ++body_start;

    while (*body_end != '\'') {
        --body_end;
        *delimiter_size -= 1;
        if (body_end == body_start) {
            Assert(false);
            return false;
        }
    }
    /* skip over the hyphen */
    --body_end;

    /* update location and size */
    *body_location += (body_start - body_start_before);
    *body_size -= (body_start - body_start_before) + (body_end_before - body_end);
    return true;
}

/*
 * @description update the body to the body between the dollar delimiters, and update the delimiter size to point until
 *  the closing delimiter
 * @param[IN/OUT] delimiter_size - the size of the query from the beginning of the opening delimiter to the end of the
 *  closing delimiter
 * @param[IN/OUT] body_start - a pointer to the query of where the body_location is pointing to know
 * @param[IN/OUT] body_location - the current offset in the query
 * @param[IN/OUT] body_size - the size of the query after the location
 * @return true or false for unexpected errors
 */
bool FuncHardcodedValues::process_def_process_delimiter_dollar(size_t *delimiter_size, const char *body_start,
    size_t *body_location, size_t *body_size)
{
    if (!delimiter_size || !body_start || !body_location || !body_size) {
        Assert(false);
        return false;
    }

    const char *delimiter_begin_1 = strchr(body_start, '$');
    const char *delimiter_begin_2 = NULL;
    if (delimiter_begin_1 != NULL) {
        delimiter_begin_2 = strchr(delimiter_begin_1 + 1, '$');
    }
    if (delimiter_begin_1 == NULL || delimiter_begin_2 == NULL) {
        Assert(false);
        return false;
    }

    /*
     * in this specific case we update the body_size, in all other cases we use the default which is until the END
     * AS $OPTIONAL$
     * $OPTIONAL$
     * if there's a $$ present then the query definition starts with it
     * skip the $$ (or the $BODY$) in the query definition
     */

    /* save the tag that was found */
    char delimiter_tag[256] = {0};
    size_t delimiter_tag_size = delimiter_begin_2 - delimiter_begin_1 + 1;
    check_memcpy_s(memcpy_s(delimiter_tag, sizeof(delimiter_tag), delimiter_begin_1, delimiter_tag_size));

    /* increment the body_location offset after the end of the delimiter */
    *body_location += (delimiter_begin_2 + 1) - body_start;

    /* locate the enclosing delimiter $BODY$ */
    const char *delimiter_end = strstr((delimiter_begin_2 + 1), delimiter_tag);
    if (delimiter_end == NULL) {
        return false;
    }

    /*
     * the body_location is already updated to after the starting delimiter tag
     * so now all we need to do is set the body_size to before the enclosing delimiter tag
     */
    *body_size = delimiter_end - delimiter_begin_2 - 1;
    return true;
}

/*
 * @description update the body to the body between the delimiters, and update the delimiter size to point until the
 *  closing delimiter
 * @param[IN/OUT] delimiter_size - the size of the query from the beginning of the opening delimiter to the end of the
 *  closing delimiter
 * @param[IN/OUT] body_start - a pointer to the query of where the body_location is pointing to know
 * @param[IN/OUT] body_location - the current offset in the query
 * @param[IN/OUT] body_size - the size of the query after the location
 */
bool FuncHardcodedValues::process_def_process_delimiters(size_t *delimiter_size, const char *body_start,
    size_t *body_location, size_t *body_size, DelimiterType *delimiter_type)
{
    if (!delimiter_size || !body_start || !body_location || !body_size) {
        Assert(false);
        return false;
    }

    /* check delimiter type because we have different parsing for each */
    if (*body_start == '\'') {
        *delimiter_type = DELIMITER_TYPE_HYPHEN;
    } else if (*body_start == '$') {
        *delimiter_type = DELIMITER_TYPE_DOLLAR;
    } else if (pg_strncasecmp(body_start, "AS", strlen("AS")) == 0) {
        *delimiter_type = DELIMITER_TYPE_AS;
    }

    bool ret = true;
    if (*delimiter_type == DELIMITER_TYPE_HYPHEN) {
        /* ignore the hyphens wrapping the query */
        ret = process_def_process_delimiter_hyphen(delimiter_size, body_start, body_location, body_size);
    } else if (*delimiter_type == DELIMITER_TYPE_DOLLAR) {
        ret = process_def_process_delimiter_dollar(delimiter_size, body_start, body_location, body_size);
    } else if (*delimiter_type == DELIMITER_TYPE_AS) {
        /*
         * CREATE PROCEDURE func() AS
         * the query definition starts with the AS keyword.
         * the AS keyword needs to be skipped as well
         */
        *body_location += strlen("AS");
        *body_size -= strlen("AS");
    }

    return ret;
}

/* *
 * @description process the objects in the query generated by CREATE FUNCTION, CREATE PROCEDURE or DO
 * @param[in] as_item - the AS portion in the query in its DefElem form
 * @param[in] is_do_stmt - did the parser recognize this query as a DoStmt node
 * @param[in] language_item - the LANGUAGE portion in the query in its DefElem form
 * @param[out] body - the query body
 * @param[out] delimiter_location - the location or position of where the query openining delimiter starts in the CREATE
 *  FUNCTION query
 * @param[out] delimiter_size - the query size from the opening delimiter to the enclosing delimiter
 * @param[out] body_location - the location or position of where the query body starts in the CREATE FUNCTION query
 * @param[out] body_size - the query body size
 * @param[out] language - the language used in the current function. supported language include SQL or PL/PGSQL
 * @param[out] StatementData - the current client logic state machine for the query
 * @return boolean, returns true or false for severe, unexpected errors
 */
bool FuncHardcodedValues::process_def_elem_options(DefElem *as_item, bool is_do_stmt, DefElem *language_item,
    char **body, size_t *delimiter_location, size_t *delimiter_size, size_t *body_location, size_t *body_size,
    const char **language, const StatementData *statement_data)
{
    if (!as_item || !body || !body_size || !language || !statement_data) {
        Assert(false);
        return false;
    }

    /* retrieve the query body from the parser */
    const char *body_from_parser = process_def_body_parser(as_item);
    if (!body_from_parser) {
        return false;
    }

    /* retrieve the language as determined by the parser */
    *language = process_def_language(language_item, is_do_stmt);
    if (*language == NULL) {
        return false;
    }

    /* skip spaces in case there are any, so the body_start will pointer to the first meaningful character */
    const char *body_start =
        process_def_skip_spaces(statement_data->params.adjusted_query, delimiter_location, delimiter_size);
    if (!body_start) {
        return false;
    }

    /* check if a DO command is present in the beginning of the query, and skip it */
    body_start =
        process_def_skip_do_cmd(statement_data->params.adjusted_query, delimiter_location, delimiter_size, is_do_stmt);
    if (!body_start) {
        return false;
    }

    /* align the body_location and the body_size to reflect only the body definition itself */
    *body_location = *delimiter_location;
    *body_size = *delimiter_size;

    /* update the locations based on the specific delimiter that wraps the function query */
    DelimiterType delimiter_type = DELIMITER_TYPE_NONE;
    if (!process_def_process_delimiters(delimiter_size, body_start, body_location, body_size, &delimiter_type)) {
        return false;
    }

    /* we set the real body to work with the original query */
    *body = (char *)statement_data->params.adjusted_query + *body_location;
    if (*body == NULL) {
        return false;
    }

    /*
     * even though we know where the query starts,
     * in SQL - we cannot use the original query because it could be escaped with some espacing only relevant inside of
     * functions.
     * For example:
     * CREATE OR REPLACE FUNCTION insert_func300() RETURNS VOID AS
     * 'INSERT INTO accounts(name,balance) VALUES(''100'', 100);' LANGUAGE SQL;
     * the body from the parser is unescaped
     * we need to run the lexer before we pass the query to run_pre_query() and remove this code.
     */
    bool use_body_from_parser = false;
    if (pg_strncasecmp(*language, "plpgsql", strlen("plpgsql")) == 0 && delimiter_type == DELIMITER_TYPE_HYPHEN) {
        use_body_from_parser = true;
    } else if (pg_strncasecmp(*language, "sql", strlen("sql")) == 0) {
        use_body_from_parser = true;
    }

    if (use_body_from_parser) {
        *body = (char *)body_from_parser;
        *body_size = strlen(*body);
    }
    return true;
}

/**
 * @description process the query in CREATE FUNCTION, CREATE PROCEDURE or DO
 * @param[in] options - list of DefElem nodes - they are derieved either from the CreateFunctionStmt object or the
 *  DoStmt object
 * @param[in] is_do_stmt - did the parser recognize this query as a DoStmt node
 * @param[out] as_item - the AS portion in the query in its DefElem form
 * @param[out] language_item - the LANGUAGE portion in the query in its DefElem form
 * @param[out] delimiter_location - the location or position of where the query openining delimiter starts in the CREATE
 *  FUNCTION query
 * @param[out] delimiter_size - the query size from the opening delimiter to the enclosing delimiter
 * @param[out] StatementData - the current client logic state machine for the query
 * @return boolean, returns true or false for severe, unexpected errors
 */

bool FuncHardcodedValues::extract_def_elem_options(const List *options, bool is_do_stmt, DefElem **as_item,
    DefElem **language_item, size_t *delimiter_location, size_t *delimiter_size, const StatementData *statement_data)
{
    if (!options || !as_item || !language_item || !delimiter_location || !delimiter_size || !statement_data) {
        Assert(false);
        return false;
    }
    int as_location = 0;
    int language_location = 0;
    int next_to_as_location = INT_MAX;
    ListCell *option = NULL;

    /*
     * parse option twice. in order to calculate the real body length, we want to get the location of the attribute
     * after the "AS" option. so first time make sure we have the "AS" location, then take the minimal option location
     * that is bigger then "AS" location.
     */
    *as_item = NULL;
    *language_item = NULL;
    foreach (option, options) {
        DefElem *defel = (DefElem *)lfirst(option);
        if (pg_strncasecmp(defel->defname, "as", strlen("as")) == 0) {
            if (*as_item != NULL) {
                printfPQExpBuffer(&statement_data->conn->errorMessage,
                    libpq_gettext("ERROR(CLIENT): conflicting or redundant options\n"));
                return false;
            }
            *as_item = defel;
            as_location = defel->location;
        } else if (pg_strncasecmp(defel->defname, "language", strlen("language")) == 0) {
            if (*language_item != NULL) {
                printfPQExpBuffer(&statement_data->conn->errorMessage,
                    libpq_gettext("ERROR(CLIENT): conflicting or redundant options\n"));
                return false;
            }
            *language_item = defel;
            language_location = defel->location;
        }
    }

    /* if the LANGUAGE is after the AS, then use its location. otherwise, keep it as INT_MAX */
    if (as_location > 0 && language_location > as_location) {
        next_to_as_location = language_location;
    }

    /* bison parser failed to parse the CREATE FUNCTION/PROCEDURE properly. probably wrong input by user. */
    if ((*as_item == NULL) || (*language_item == NULL && is_do_stmt == false)) {
        return false;
    }

    if (next_to_as_location == INT_MAX || next_to_as_location < as_location) {
        next_to_as_location = strlen(statement_data->params.adjusted_query);
    }

    *delimiter_location = as_location;
    *delimiter_size = next_to_as_location - as_location;
    return true;
}

/*
 * @description when parsing a function written in the SQL language, we can pass it whole to the SQL Parser
 *  so in this case we create a bodies list with only one element
 * @param[in] body - the query in the function definition
 * @param[in] body - the query size in the function definition
 * @param[in] body_location - the location/offset of where the actual query body is located in the CREATE FUNCTION
 *  statement
 * @param[out] bodies_size - the number of elements in the array
 * @return array of bodies / sql queries
 */
FuncHardcodedValues::Body **FuncHardcodedValues::make_sql_body(const char *body, size_t body_size, size_t body_location,
    size_t *bodies_size)
{
    if (!body || !bodies_size) {
        Assert(false);
        return NULL;
    }
    Body **bodies = (Body **)feparser_malloc0(sizeof(Body *));
    if (bodies == NULL) {
        return NULL;
    }
    bodies[0] = (Body *)feparser_malloc0(sizeof(Body));
    if (bodies[0] == NULL) {
        return NULL;
    }

    bodies[0]->partial_body = feparser_strndup(body, body_size);
    bodies[0]->body_location = body_location;
    bodies[0]->to_parse = true;
    *bodies_size = 1;
    return bodies;
}

const char *FuncHardcodedValues::parse_character(const char *body, size_t body_size, const char *statement_begin,
    const char *statement_end, size_t body_location, Body **bodies, size_t body_idx, FuncParseInfo *func_parse_info)
{
    if (!body || !statement_begin || !statement_end || !bodies || !func_parse_info) {
        Assert(false);
        return NULL;
    }

    /* maintain state machine for strings */
    if (*statement_end == '\'' || *statement_end == '"') {
        func_parse_info->in_string = !func_parse_info->in_string;
    }

    /* condition blocks support */
    func_parse_info->is_parse = true;
    func_parse_info->is_split = false;
    if (func_parse_info->in_string == true) {
        return statement_end;
    }
    if (!check_should_split(&statement_end, func_parse_info)) {
        return NULL;
    }

    /* if end of statement then add to bodies array */
    bool is_eof = ((size_t)(statement_end - body) + 1 == body_size);
    if ((*statement_end == ';' || is_eof || func_parse_info->is_split) && !func_parse_info->in_string) {
        func_parse_info->is_split = true;
        bodies[body_idx] = (Body *)feparser_malloc0(sizeof(Body));
        bodies[body_idx]->to_parse = func_parse_info->is_parse;

        /* 
         * the body_location specifics where in the original query we need to re-write
         * we increment the body_location to the beginning of the statement
         */
        bodies[body_idx]->body_location = body_location + (statement_begin - body);

        /* 
         * strdup the statement so we can re-write it on its own (seperately from the original query)
         * then we will just overwrite the statement over the specific location in the original query
         */
        bodies[body_idx]->partial_body = feparser_strndup(statement_begin, statement_end - statement_begin + 1);
        char *partial_body = bodies[body_idx]->partial_body;

        /* handle cases where sql queries are embedded inside plpgsql special commands */
        if (func_parse_info->is_parse) {
            partial_body = skip_return_query(bodies[body_idx]->partial_body);
            bodies[body_idx]->body_location += partial_body - bodies[body_idx]->partial_body;
            bodies[body_idx]->partial_body = partial_body;
        }
    }

    return statement_end;
}

/*
 * @description when parsing a function written in the PL/SQL language, we have to split it to an array of statements
 *  because we can only parse SQL statements so we need to find the statements that are really relevant for processing
 * @param[in] body - the body/query in the function definition
 * @param[in] body_size - the body/query size (it's not null terminated)
 * @param[in] body_location - the location/offset of where the actual query body is located in the CREATE FUNCTION
 *  statement
 * @param[out] bodies_size - the number of elements in the array
 * @return array of bodies / sql queries
*/
FuncHardcodedValues::Body **FuncHardcodedValues::split_body(char *body, const size_t body_size, size_t body_location,
    size_t *bodies_size)
{
    if (!body || !bodies_size) {
        Assert(false);
        return NULL;
    }

    /*
     * allocate the bodies array using a basic estimate
     * count_semi_colons counts the number of syntaxes with semicolons
     * and m_BODIES_EXTRA_SIZE is the base number for control blocks, if it is not enough, will feparser_realloc it
     */
    size_t est_bodies_size = count_semi_colons(body) + m_BODIES_EXTRA_SIZE;
    Body **bodies = (Body **)feparser_malloc0((est_bodies_size + 1) * sizeof(Body *));
    if (bodies == NULL) {
        return NULL;
    }

    const char *statement_begin = body;
    const char *statement_end = statement_begin;
    size_t body_idx = 0;
    FuncParseInfo func_parse_info;
    func_parse_info.in_string = false;
    func_parse_info.in_for_loop = false;

    /*
     * loop and parse Body syntax, and split the syntax by semicolons, quotation marks, control blocks, etc
     * in parse_character function.
     */
    while ((size_t)(statement_end - body) < body_size) {
        /*
         * the original array was calculated based simply on the number of semi-colons
         * we may need to expand the array for control blocks (IF, THEN, etc.);
         */
        if (body_idx == est_bodies_size) {
            est_bodies_size += m_BODIES_EXTRA_SIZE;
            bodies = (Body **)feparser_realloc(bodies, (est_bodies_size + 1) * sizeof(Body *));
        }

        func_parse_info.is_split = false;
        const char *statement_end_ret = parse_character(body, body_size, statement_begin, statement_end, body_location,
            bodies, body_idx, &func_parse_info);
        if (statement_end_ret == NULL) {
            /* ERROR */
            ++statement_end;
            continue;
        }
        statement_end = statement_end_ret;
        if (func_parse_info.is_split && !func_parse_info.in_string) {
            /* 
             * the end of the string was copied together (that's why all of the +1), 
             * so we need to increment for the next string
             */
            statement_begin = statement_end;
            body_idx++;
            ++statement_begin;
        }
        ++statement_end;
    }

    *bodies_size = body_idx;
    return bodies;
}

/*
 * @description parse and process the SQL Query (in SQL it will be a batch, in PL/PGSQL it will be a single statement)
 * @param[in] language - the language of the body definition in the function
 * @param[in] body - a single statement or a batch containing information about the query
 * @param[in] delimiter_location - the location or position of where the query openining delimiter starts in the CREATE
 *  FUNCTION query
 * @param[in] delimiter_size - the query size from the opening delimiter to the enclosing delimiter
 * @param[INOUT] body_location_offset - the current offset after the processing of the data - because the query was
 *  re-written and the offsets were moved
 * @param[IN] - statement_data - state machine of the client logic current query
 * @return boolean, returns true or false for severe, unexpected errors
 */
bool FuncHardcodedValues::parse_body(const char *language, Body *body, size_t delimiter_location, size_t delimiter_size,
    size_t *body_location_offset, StatementData *statement_data)
{
    if (!body || !body_location_offset || !statement_data) {
        Assert(false);
        return false;
    }

    if (!body->to_parse) {
        return true;
    }

    /* prepare temporary statement based on the internal body in the function */
    const char *partial_body = body->partial_body;
    size_t partial_body_size = strlen(partial_body);
    StatementData partial_stmt_data(statement_data->conn, partial_body);

    bool failed_to_parse = false;
    if (!Processor::run_pre_query(&partial_stmt_data, true, &failed_to_parse) || failed_to_parse) {
        /*
         * even though we failed to parse a specific query, we cannot know if it was really so important
         * so we will ignore the error and continue anyway.
         */
        return true;
    }

    /* function body didn't replace any value in pre-processing */
    if (partial_stmt_data.params.adjusted_query_size == 0) {
        return true;
    }

    /* need to increment the body location originally calculated due to the re-write of previous statements */
    body->body_location += *body_location_offset;

    /*
     * after adjusting the function body with client-logic value,
     * adjust the original query with the new function body
     * statement_data: the original statement
     * partial_stmt_data.params.adjusted_query: function body
     */
    if (!adjust_original_stmt(language, statement_data, partial_stmt_data.params.adjusted_query,
        body->body_location, delimiter_location, delimiter_size, partial_body_size)) {
        fprintf(stderr, "failed to process function creation \n");
        return false;
    }

    /* calculate body_location offset due to statements re-writes */
    *body_location_offset += (partial_stmt_data.params.adjusted_query_size - partial_body_size);
    return true;
}

/*
 * @Description: adjust the CREATE FUNCTION query so the body will be the adjusted body.
 * @param[in] language - the language of the body definition in the function
 * @param[IN] statement_data: StatementData object which holds the query to adjust.
 * @param[IN] adjusted_body: function body after adjusting the processed values.
 * @param[IN] body_location: function body starting position in the original query, including the opening tag.
 * @param[in] delimiter_location - the location or position of where the query openining delimiter starts in the CREATE
 *  FUNCTION query
 * @param[in] delimiter_size - the query size from the opening delimiter to the enclosing delimiter
 * @param[IN] partial_body_size: full size of the function body from the original query,
 *  this is not necessarily the length of adjusted_body because ther parser could cut of some quotes.
 * @return boolean, returns true or false for severe, unexpected errors
 */
bool FuncHardcodedValues::adjust_original_stmt(const char *language, StatementData *statement_data,
    const char *adjusted_body, size_t body_location, size_t delimiter_location, size_t delimiter_size,
    size_t partial_body_size)
{
    if (!language || !statement_data || !adjusted_body) {
        Assert(false);
        return false;
    }

    /* parameters needed for data replacement */
    size_t new_size;
    char *new_str = NULL;
    bool free_new_str = false;
    size_t body_size_to_replace = 0;
    bool is_sql = (pg_strncasecmp(language, "sql", strlen("sql")) == 0) ? true : false;
    /*
     * wrapping the new body with double-dollar-quote.
     * in case that original body was double-dollar-quoted, keep the existing dollar-tag.
     */
    if (is_sql && statement_data->params.adjusted_query[delimiter_location] == '$') {
        if (!handle_body_quote(statement_data, adjusted_body, delimiter_location, true, &new_size, &new_str)) {
            return false;
        }
        free_new_str = true;
        body_location = delimiter_location;
        body_size_to_replace = delimiter_size;
    } else if (is_sql && statement_data->params.adjusted_query[delimiter_location] == '\'') {
        if (!handle_body_quote(statement_data, adjusted_body, delimiter_location, false, &new_size, &new_str)) {
            return false;
        }
        free_new_str = true;
        body_location = delimiter_location;
        body_size_to_replace = delimiter_size;
    } else {
        /* fill output paramters */
        new_size = strlen(adjusted_body);
        new_str = (char *)adjusted_body;
        body_size_to_replace = partial_body_size;
    }

    /* replace the whole body including its quotes */
    int res = replace_original_query(statement_data, new_size, new_str, body_location, body_size_to_replace);
    if (free_new_str) {
        libpq_free(new_str);
    }
    if (res == 0) {
        return false;
    }
    return true;
}

/*
 * @Description: build a new body string after handling the body quotes. for ' replace with $$ , and for existing
 *  $something$ keep it.
 * @param[IN] statement_data: StatementData object which holds the query to adjust.
 * @param[IN] adjusted_body: function body after adjusting the processed values.
 * @param[IN] body_location: function body starting position in the original query, including the opening tag.
 * @param[IN] is_dollar: determine if body is dollar-quoted or a single-quote.
 * @param[OUT] new_size: size of the new body to put in the original query
 * @param[OUT] new_str: place-holder for the adjusted body
 * @return: bool , true for success.
 */
bool FuncHardcodedValues::handle_body_quote(StatementData *statement_data, const char *adjusted_body,
    size_t delimiter_location, bool is_dollar, size_t *new_size, char **new_str)
{
    int tag_length = 0;
    char tag[128] = {0};

    /* create doller_tag to wrap the body. use existing one if there is one, else empty ($$) */
    if (is_dollar) {
        const char *pos = strchr(statement_data->params.adjusted_query + delimiter_location + 1, '$');
        if (!pos) {
            return false;
        }

        /* tag_length is the delta between first and second dollar */
        tag_length = pos - (statement_data->params.adjusted_query + delimiter_location) + 1;
        check_strncat_s(
            strncat_s(tag, sizeof(tag), statement_data->params.adjusted_query + delimiter_location, tag_length));
    } else {
        tag_length = strlen(EMPTY_DOLLAR_TAG);
        check_strncat_s(strncat_s(tag, sizeof(tag), EMPTY_DOLLAR_TAG, strlen(EMPTY_DOLLAR_TAG)));
    }

    /* new body need one tag in the beggining and one in the end */
    size_t temp_size = strlen(adjusted_body) + (size_t)(tag_length * 2); /* 2 is the num for $$ and $$ */
    char *temp_str = (char *)malloc(temp_size + 1);
    if (!temp_str) {
        return false;
    }
    temp_str[0] = '\0';

    check_strncat_s(strncat_s(temp_str, temp_size + 1, tag, tag_length));
    check_strncat_s(strncat_s(temp_str, temp_size + 1, adjusted_body, strlen(adjusted_body)));
    check_strncat_s(strncat_s(temp_str, temp_size + 1, tag, tag_length));

    /* fill output parameters */
    *new_size = temp_size;
    *new_str = temp_str;
    return true;
}

/*
 * @Description: after getting all parameters for the replacement, construct the actual new query.
 * @param[IN] statement_data: StatementData object which holds the query to adjust.
 * @param[IN] new_size: size of the new body to put in the original query.
 * @param[IN] new_str: new body to put in the original query.
 * @param[IN] location: location where to start replace content in the original query
 * @param[IN] original_size_to_replace: size to replace in the original query starting from location
 * @return: bool , true for success.
 */
int FuncHardcodedValues::replace_original_query(StatementData *statement_data, size_t new_size, const char *new_str,
    int location, size_t original_size_to_replace)
{
    statement_data->params.new_query_size =
        strlen(statement_data->params.adjusted_query) + (new_size - original_size_to_replace);
    char *new_query_tmp = statement_data->params.new_query;
    statement_data->params.new_query = (char *)malloc(statement_data->params.new_query_size + 1);
    if (!statement_data->params.new_query) {
        return 0;
    }
    statement_data->params.new_query[0] = '\0';

    /* concatinate data */
    check_strncat_s(strncat_s(statement_data->params.new_query, statement_data->params.new_query_size + 1,
        statement_data->params.adjusted_query, location));
    check_strncat_s(strncat_s(statement_data->params.new_query, statement_data->params.new_query_size + 1,
        new_str, new_size));
    check_strncat_s(strncat_s(statement_data->params.new_query, statement_data->params.new_query_size + 1,
        statement_data->params.adjusted_query + location + original_size_to_replace,
        strlen(statement_data->params.adjusted_query) - location - original_size_to_replace));
    int res = new_size;
    libpq_free(new_query_tmp);
    statement_data->params.adjusted_query = statement_data->params.new_query;
    statement_data->params.adjusted_query_size = statement_data->params.new_query_size;
    return res;
}

const char *FuncHardcodedValues::find_keyword_beginning(const char *str, FuncParseInfo *func_parse_info, bool *is_exit)
{
    static const char* skip_words[] = {"else", "elsif", "case", "loop", "when", "exception", "begin", "end", "declare"};
    static const size_t skip_words_len = sizeof(skip_words) / sizeof(*skip_words);

    /* skip "IF" */
    if (pg_strncasecmp(str, "if ", strlen("if ")) == 0) {
        str += strlen("if ");
        func_parse_info->found_if = true;
    } else if (pg_strncasecmp(str, "end ", strlen("end ")) == 0) {
        str += strlen("end ");
        func_parse_info->found_end = true;
    } else if (pg_strncasecmp(str, "for ", strlen("for ")) == 0) {
        str += strlen("for ");
        func_parse_info->for_loop = true;
    } else if (pg_strncasecmp(str, "while ", strlen("while ")) == 0) {
        str += strlen("while ");
        func_parse_info->while_loop = true;
    } else {
        /*
         * we are in a FOR r IN ...
         * there could be a statement before the loop
         * return the statement before the loop and in te next iterator process the LOOP keyword
         */
        if (func_parse_info->in_for_loop && pg_strncasecmp(str, "loop", strlen("loop")) == 0) {
            /* the delimiter is before the LOOP keyword so (ret - 1) */
            --str;
            func_parse_info->in_for_loop = false;

            /* the statement can be split and parsed */
            func_parse_info->is_split = true;
            func_parse_info->is_parse = true;
            *is_exit = true;
            return str;
        }
        for (size_t i = 0; i < skip_words_len; i++) {
            if (pg_strncasecmp(str, skip_words[i], strlen(skip_words[i])) == 0) {
                if (pg_strncasecmp(str, "begin", strlen("begin")) == 0) {
                    func_parse_info->found_begin = true;
                } else {
                    func_parse_info->skip_to_endline = true;
                }
                str += strlen(skip_words[i]);
                break;
            }
        }
    }

    return str;
}

const char *FuncHardcodedValues::find_keyword_ending(const char *str, FuncParseInfo *func_parse_info, bool *is_exit)
{
    const char *pos = NULL;
    if (func_parse_info->found_if) {
        pos = find_word(str, "then");
        if (pos == NULL) {
            /*
             * there is an IF statement but the THEN is not in the same line
             * we don't support this use case so we are forced to regard this as if this is not an IF statement,
             * so just continue parsing.
             */
            func_parse_info->is_split = false;
            *is_exit = true;
            return str;
        }

        str += strlen("then");
        func_parse_info->skip_to_endline = true;
    } else if (func_parse_info->found_end) {
        pos = find_word(str, "if");
        if (pos == NULL) {
            *is_exit = true;
            return str;
        }
        pos += strlen("if");
        func_parse_info->skip_to_endline = true;
    } else if (func_parse_info->while_loop) {
        pos = find_word(str, "loop");
        if (pos == NULL) {
            /*
             * there is a WHILE statement but the LOOP is not in the same line
             * we don't support this use case so we are forced to regard this as if this is not a WHILE statement,
             * so just continue parsing.
             */
            func_parse_info->is_split = false;
            *is_exit = true;
            return str;
        }
        pos += strlen("loop");
        func_parse_info->skip_to_endline = true;
    } else if (func_parse_info->for_loop) {
        pos = find_word(str, "in");
        if (pos == NULL) {
            /*
             * there is an FOR statement but the IN is not in the same line
             * we don't support this use case so we are forced to regard this as if this is not a FOR statement,
             * so just continue parsing.
             */
            func_parse_info->is_split = false;
            *is_exit = true;
            return str;
        }
        pos += strlen("in");
        func_parse_info->in_for_loop = true;

        /* split the statement like "FOR r IN" and don't parse it */
        func_parse_info->is_split = true;
        func_parse_info->is_parse = false;
        *is_exit = true;
    }
    if (pos != NULL) {
        str = pos;
    }
    return str;
}

/*
 * @description splitting PL/PGSQL to multiple queries is done by parsing the control blocks (IF, FOR, etc.)
 * the purpose of this function is to distill the "real" SQL queries hidden inside the function definition
 * only these sql queries should be passed to the sql parser
 * @param[in] str - the remaining query definition
 * @param[out] is_split - whether the query parsed until now should be considered as a separate query and therefore
 * should be split aside
 * @param[out] is_parse - whether the query should be passed to an SQL parser or it should just be ignored
 * @param[out] in_for_loop - whether the query was split in the middle of a FOR ... LOOP syntax
 */
bool FuncHardcodedValues::check_should_split(const char **str, FuncParseInfo *func_parse_info)
{
    if (!str || !func_parse_info) {
        Assert(false);
        return false;
    }

    func_parse_info->found_begin = false;
    func_parse_info->found_if = false;
    func_parse_info->found_end = false;
    func_parse_info->skip_to_endline = false;
    func_parse_info->while_loop = false;
    func_parse_info->for_loop = false;
    const char *ret = *str;

    /* skip whitespace */
    while (*ret != '\0' && isspace(*ret)) {
        ++ret;
    }
    if (*ret == '\0') {
        return false;
    }

    bool is_exit = false; /* is_exit is the flag to return, if it is true, return it */
    ret = find_keyword_beginning(ret, func_parse_info, &is_exit);
    if (ret == NULL || *ret == '\0') {
        return false;
    }
    if (is_exit) {
        *str = ret;
        return true;
    }

    if (!func_parse_info->skip_to_endline) {
        ret = find_keyword_ending(ret, func_parse_info, &is_exit);
    }
    if (is_exit) {
        *str = ret;
        return true;
    }

    if (func_parse_info->skip_to_endline) {
        while ((*ret != '\0') && (*ret != '\n')) {
            ret++;
        }
    }
    if (*ret == '\0') {
        return false;
    }
    if (func_parse_info->skip_to_endline || func_parse_info->found_begin) {
        *str = ret;
        func_parse_info->is_split = true;
        func_parse_info->is_parse = false;
        return true;
    }
    func_parse_info->is_split = false;
    return true;
}

/*
 * @Description this function is used to give some estimate on the number of SQL queries in a PL/PGSQL block
 * it's not a very accurate estimate and there reallocs are going to happen on an array holding the queries
 * @param[in] function query definition
 * @return number of estimated queries
 */
const size_t FuncHardcodedValues::count_semi_colons(const char *str)
{
    const char *ptr = str;
    size_t count = 0;
    while ((ptr = strchr(ptr, ';')) != NULL) {
        count++;
        ptr++;
    }

    return count;
}

/* skip whitespace */
char *skip_space(char *str)
{
    while ((*str != '\0') && isspace(*str)) {
        ++str;
    }
    if (*str == '\0') {
        return NULL;
    }
    return str;
}

/*
 * @Descripion extracting a query in within a RETURN QUERY(<sql statement>) block, and other similar scenarios
 * RETURN QUERY()
 * RETURN QUERY EXECUTE()
 * EXECUTE()
 * EXECUTE IMMEDIATE()
 * @param[in] str - the duped query. this is the query to be used by the parser but not the query sent to the server
 * @return the str pointer adjusted to the location of where the query statement begins. it's also null terminated in
 * the end of the sql statement.
 */
char *FuncHardcodedValues::skip_return_query(char *str)
{
    RETURN_IF(str == NULL, NULL);
    char *ret = str;

    /* skip whitespace */
    str = skip_space(str);
    RETURN_IF(str == NULL, ret);

    /* skip RETURN */
    bool is_return = false;
    bool is_execute = false;
    if (pg_strncasecmp(str, "return", strlen("return")) == 0) {
        str += strlen("return");
        is_return = true;
    } else if (pg_strncasecmp(str, "execute", strlen("execute")) == 0) {
        str += strlen("execute");
        is_execute = true;
    } else {
        return ret;
    }

    /* skip whitespace */
    str = skip_space(str);
    RETURN_IF(str == NULL, ret);
    ret = str;

    /* skip QUERY (optional) */
    bool is_query_execute = false;
    if (is_return && pg_strncasecmp(str, "query", strlen("query")) == 0) {
        str += strlen("query");

        /* skip whitespace */
        str = skip_space(str);
        RETURN_IF(str == NULL, ret);

        /* skip EXECUTE (optional) */
        if (pg_strncasecmp(str, "execute", strlen("execute")) == 0) {
            str += strlen("execute");
            is_query_execute = true;
        }
    } else if (is_execute && pg_strncasecmp(str, "immediate", strlen("immediate")) == 0) {
        str += strlen("immediate");
    }

    str = skip_space(str);
    RETURN_IF(str == NULL, ret);
    ret = str;

    if (is_query_execute || is_execute) {
        /*
         * remove the ' ' around 'comamnd string' in the query that we pass to the SQL parser:w
         * we also need to change the ' ' to " " because the ciphertext is always enclosed in a '' clause
         * so we need to re-write the query to include "" in the command string
         */
        if (*str == '\'' || *str == '"') {
            ++str;
            RETURN_IF(str == NULL, ret);
            ++ret;
            char *str_eof = strrchr(str, '\'');
            if (str_eof == NULL) {
                str_eof = strrchr(str, '"');
            }
            if (str_eof != NULL) {
                *str_eof = '\0';
            }
        } else {
            return ret;
        }
    } else {
        /* remove parentheses (OPTIONAL) */
        if ((*str) == '(') {
            ++str;
            RETURN_IF(str == NULL, ret);
            ++ret;
            char *str_eof = strrchr(str, ')');
            if (str_eof && *str_eof == ')') {
                *str_eof = '\0';
            }
        }
    }

    /* EOF */
    return ret;
}

const char *FuncHardcodedValues::find_word(const char *sentence, const char *word)
{
    const char *result = strcasestr(sentence, word);
    /* if word was not found */
    if (result == NULL) {
        return NULL;
    }

    /* if word is not in the beginning of the sentence and there's no space before the word */
    const char *before_result = result - 1;
    if (result != sentence && !isspace(*before_result)) {
        return NULL;
    }

    /* if word is not in the end of the sentence and there's no space after the word */
    const char *result_eof = result + strlen(word);
    const char *sentence_eof = sentence + strlen(sentence);
    if (result_eof != sentence_eof && !isspace(*result_eof)) {
        return NULL;
    }

    /* result is definitely a separate word. */
    return result;
}
